@Article{LotteHaaKarAraShi:2018:CaStUs,
author = "Lotte, Rodolfo Georjute and Haala, Norbert and Karpina, Mateusz
and Arag{\~a}o, Luiz Eduardo Oliveira e Cruz de and Shimabukuro,
Yosio Edemir",
affiliation = "{Instituto Nacional de Pesquisas Espaciais (INPE)} and {University
of Stuttgart} and {Wroclaw University of Environmental and Life
Sciences} and {Instituto Nacional de Pesquisas Espaciais (INPE)}
and {Instituto Nacional de Pesquisas Espaciais (INPE)}",
title = "3D Fa{\c{c}}ade Labeling over Complex Scenarios: A Case Study
Using Convolutional Neural Network and Structure-From-Motion",
journal = "Remote Sensing",
year = "2018",
volume = "10",
number = "9",
pages = "e1435",
month = "Sept.",
keywords = "fa{\c{c}}ade feature detection, 3D reconstruction, deep-learning,
structure-from-motion.",
abstract = "Urban environments are regions in which spectral variability and
spatial variability are extremely high, with a huge range of
shapes and sizes, and they also demand high resolution images for
applications involving their study. Due to the fact that these
environments can grow even more over time, applications related to
their monitoring tend to turn to autonomous intelligent systems,
which together with remote sensing data could help or even predict
daily life situations. The task of mapping cities by autonomous
operators was usually carried out by aerial optical images due to
its scale and resolution; however new scientific questions have
arisen, and this has led research into a new era of
highly-detailed data extraction. For many years, using artificial
neural models to solve complex problems such as automatic image
classification was commonplace, owing much of their popularity to
their ability to adapt to complex situations without needing human
intervention. In spite of that, their popularity declined in the
mid-2000s, mostly due to the complex and time-consuming nature of
their methods and workflows. However, newer neural network
architectures have brought back the interest in their application
for autonomous classifiers, especially for image classification
purposes. Convolutional Neural Networks (CNN) have been a trend
for pixel-wise image segmentation, showing flexibility when
detecting and classifying any kind of object, even in situations
where humans failed to perceive differences, such as in city
scenarios. In this paper, we aim to explore and experiment with
state-of-the-art technologies to semantically label 3D urban
models over complex scenarios. To achieve these goals, we split
the problem into two main processing lines: first, how to
correctly label the fa{\c{c}}ade features in the 2D domain, where
a supervised CNN is used to segment ground-based fa{\c{c}}ade
images into six feature classes, roof, window, wall, door, balcony
and shop; second, a Structure-from-Motion (SfM) and
Multi-View-Stereo (MVS) workflow is used to extract the geometry
of the fa{\c{c}}ade, wherein the segmented images in the previous
stage are then used to label the generated mesh by a reverse
ray-tracing technique. This paper demonstrates that the proposed
methodology is robust in complex scenarios. The fa{\c{c}}ade
feature inferences have reached up to 93% accuracy over most of
the datasets used. Although it still presents some deficiencies in
unknown architectural styles and needs some improvements to be
made regarding 3D-labeling, we present a consistent and simple
methodology to handle the problem.",
doi = "10.3390/rs10091435",
url = "http://dx.doi.org/10.3390/rs10091435",
issn = "2072-4292",
language = "en",
targetfile = "lotte_3d.pdf",
urlaccessdate = "27 abr. 2024"
}